#########################
# Ethan vanderWilden
# Functions File
# Stigmatizing the Radical Right (with Laia Balcells, Sergi Martinez, and Vicente Valentim)

# Last modified: February 2025

# DESCRIPTION: This file contains functions to expedite the analysis. It should be loaded with
#               source("functionsBMVV.R) at the beginning of the analysis script.

#########################

# Function A takes in a dataset and returns regression coefficients & SE on the validation outcomes
# (second order perceptions of how different actors view the radical right).
# If you want to include control variables (here, age, gender, education, income, pol.know,
# SP.nationalism, CCAA.nationalism , and a regional FE), set "controls" to TRUE.

# Function B takes in a dataset and specific outcome variable name, and then returns regression
# coefficients & SE (including necessary corrections for multiple hypotheses)
# If you want to include control variables (here, age, gender, education, income, pol.know,
# SP.nationalism, CCAA.nationalism , and a regional FE), set "controls" to TRUE.

# Function C takes in a dataframe of regression coefficients (either from Function A or B)
# and returns a coefficient plot. The user can adjust the title of the plot (mytitle), whether the
# confidence intervals should reflect multiple hypothesis corrections (bh), the range of
# the X axis centered at 0 (range), and the length of interval between X axis labels (jump).

# Function D takes in a dataset and specific outcome variable name and returns a coefficient 
# plot, where the coefficients reflect the differences in means between relevant treatment groups 
# (to assess H2) on the specified outcome. The user can adjust the title of the plot (mytitle), 
# the range of the X axis centered at 0 (range), and the length of interval between X axis labels (jump).



#### A. Get regression coefficients for validation estimates
# (does cued stima move 2nd order perception of stigma from source?)
validation_coefs <- function(df, controls = F){
  
  ## 1. Create dataframe to store results
  results <- data.frame(
    treatments = c(rep(c("Peers", "Media", "Political\nElites"), 3)),
    Outcome = c(rep("Peers\n(2nd order)\n", 3), rep("Media\n(2nd order)\n", 3), rep("Pol. Elite\n(2nd order)\n", 3)),
    estimate = rep(NA, 9), se = rep(NA, 9)
  )
  
  ## 2. Get effect estimates and standard errors (9 regressions)
  if (controls == F){  #For bivariate model
    for (i in 1:3){
      results[i, 3:4] <- coef(summary(lm(peer.accept ~ factor(treat), data = subset(df, treat %in% c(0, i)))))[2, 1:2]
      results[(i+3), 3:4] <- coef(summary(lm(media.accept ~ factor(treat), data = subset(df, treat %in% c(0, i)))))[2, 1:2]
      results[(i+6), 3:4] <- coef(summary(lm(elite.accept ~ factor(treat), data = subset(df, treat %in% c(0, i)))))[2, 1:2]
    }
    
    
  } else{ #for model including pre-treatment controls
    for (i in 1:3){ #get estimate and standard error for each outcome
      results[i, 3:4] <- coef(summary(lm(peer.accept ~ factor(treat) + age + gender + education + income + 
                                           pol.know + SP.nationalism + CCAA.nationalism + factor(region),
                                         data = subset(df, treat %in% c(0, i)))))[2, 1:2]
      results[(i+3), 3:4] <- coef(summary(lm(media.accept ~ factor(treat) + age + gender + education + income + 
                                               pol.know + SP.nationalism + CCAA.nationalism + factor(region),
                                             data = subset(df, treat %in% c(0, i)))))[2, 1:2]
      results[(i+6), 3:4] <- coef(summary(lm(elite.accept ~ factor(treat) + age + gender + education + income + 
                                               pol.know + SP.nationalism + CCAA.nationalism + factor(region),
                                             data = subset(df, treat %in% c(0, i)))))[2, 1:2]
    }
  }
  
  ## 3. Return results
  return(results)
}


#### B. Get regression coefficients for other regression estimates
# (does treatment induce greater stigma against Vox?)
reg_coefs <- function(df, outcome = "gen.stigma", controls = F){
  
  ## 1. Create dataframe to store results
  results <- data.frame(
    treatments = c(rep(c("Peers", "Media", "Political\nElites"), 3), "Peer vs. Media/Pol. Elite"),
    Subgroup = c(rep("Full\nsample\n", 3), rep("Right-wing\nsample\n", 3), rep("Left-wing\nsample\n", 3), "Full\nsample\n"),
    estimate = rep(NA, 10), se = rep(NA, 10)
  )
  
  
  ## 2. Get effect estimates and standard errors (9 regressions)
  if (controls == F){  #For bivariate model
    for (i in 1:3){ 
      results[(i), 3:4] <- coef(summary(lm(get(outcome) ~ factor(treat), data = subset(df, treat %in% c(0, i)))))[2, 1:2]
      results[(i+3), 3:4] <- coef(summary(lm(get(outcome) ~ factor(treat), data = subset(df, treat %in% c(0, i) & ideology >= 5))))[2, 1:2]
      results[(i+6), 3:4] <- coef(summary(lm(get(outcome) ~ factor(treat), data = subset(df, treat %in% c(0, i) & ideology < 5))))[2, 1:2]
    }
    
  } else{ #for model including pre-treatment controls
    for (i in 1:3){ 
      results[(i), 3:4] <- coef(summary(lm(get(outcome) ~ factor(treat) + age + gender + education + income + 
                                               pol.know + SP.nationalism + CCAA.nationalism + factor(region),
                                             data = subset(df, treat %in% c(0, i)))))[2, 1:2]
      results[(i+3), 3:4] <- coef(summary(lm(get(outcome) ~ factor(treat) + age + gender + education + income + 
                                                pol.know + SP.nationalism + CCAA.nationalism + factor(region),
                                              data = subset(df, treat %in% c(0, i) & ideology >= 5))))[2, 1:2]
      results[(i+6), 3:4] <- coef(summary(lm(get(outcome) ~ factor(treat) + age + gender + education + income + 
                                                pol.know + SP.nationalism + CCAA.nationalism + factor(region),
                                              data = subset(df, treat %in% c(0, i) & ideology < 5))))[2, 1:2]
    }
  }
  
  ## 3. Calculate regression estimate and SE for H2 comparison (for BH correction)
  #create a variable that takes 1 if respondent got the "peer" prime, 0 if "media" or "elite" prime
  df$is.peer <- case_match(df$treat, 0 ~ NA, 1 ~ 1, 2 ~ 0, 3 ~ 0, .default = NA)
  
  if(controls == F){
    results[10, 3:4] <- coef(summary(lm(get(outcome) ~ is.peer, data = df)))[2, 1:2]
  } else {
    results[10, 3:4] <- coef(summary(lm(get(outcome) ~ is.peer + age + gender + education + income + 
                                          pol.know + SP.nationalism + CCAA.nationalism,
                                        data = df)))[2, 1:2]
  }
  
  ## 4. Apply Benjamini-Hochberg corrections for 4 comparisons (full sample source-specific + comparison between peer/non-peer)
  results$bh_se <- results$se #create column to store BH adjusted standard errors for plotting confidence intervals
  ranking = rank(pnorm(-abs(results[c(1, 2, 3, 10), 3]/results[c(1, 2, 3, 10), 4]), 0, 1), ties.method = 'first')
  results[c(1, 2, 3, 10), 5] <- results[c(1, 2, 3, 10),5]*(abs(qnorm(ranking*0.025/length(ranking)))/1.96) #adjust SE for confidence interval
  
  
  ## 5. Return results
  return(results)
}


#### C. Plot coefficients (90 and 95% confidence intervals)
get_plot <- function(coef_data, mytitle, bh = T, range = 1, jump = 0.5){
  
  leg.title <- names(coef_data)[2]      #save legend title (2nd column or coefficient data)  
  names(coef_data)[2] <- "legend.var"   #rename 2nd column as "legend variable"

  #Ensure order of comparison order remains consistent
  coef_data$legend.var <- factor(coef_data$legend.var, levels = coef_data$legend.var[c(1, 4, 7)])
  coef_data$treatments <- factor(coef_data$treatments, levels = coef_data$treatments[1:3])
  
  #if Benjamini-Hochberg corrections are desired, overwrite SE with BH se's
  if(bh == T & ncol(coef_data) == 5){coef_data$se <- coef_data$bh_se}
  
  #plot 90 and 95% confidence intervals and coefficient point estimates
  myplot <- ggplot(data = coef_data[c(1:9), ]) +
    aes(x = estimate, y = treatments, color = legend.var) +
    geom_vline(xintercept = 0, linetype = "dashed")+
    geom_pointrange(aes(xmin= estimate-1.96*se, xmax= estimate+1.96*se), 
                    shape = 16, linewidth = 1.5,
                    position = position_dodge(width = -1/2)) +
    geom_pointrange(aes(xmin= estimate-1.645*se, xmax= estimate+1.645*se), 
                    shape = 16, size = 1.1, lwd = 2.8,
                    position = position_dodge(width = -1/2)) +
    scale_color_manual(values = c("black", "darkgrey", "lightgrey")) +
    labs(x = "\nTreatment effect (10 pt. scale, positive values = more stigma)",
         y = "Experimental Prime (vs. control) \n",
         title = mytitle,
         color = leg.title) +
    theme_bw()+
    theme(axis.text = element_text(size = 14),
          axis.title = element_text(size = 14, face = "bold"),
          legend.title = element_text(size = 14, face = "italic", hjust = 0.5),
          legend.text = element_text(size = 12),
          plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
          legend.position = c(0.85, 0.75),
          legend.box.background = element_rect(size = 1.1),
          panel.grid.major.y = element_blank(),
          panel.grid.minor.x = element_blank()) +
    scale_x_continuous(limits = c(-range, range), breaks = seq(-range, range, jump))+
    scale_y_discrete(limits=rev)
  
  return(myplot)
}


#### D. Plot within-treatment comparisons to assess differential efficacy
get_paired <- function(df, outcome = 'gen.stigma', range = 0.5, jump = 0.25, controls = F){
  
  
  ## 1. Create dataframe to store results
  results <- data.frame(
    treatments = c("Peer vs. \nnon-Peer (H2)", "Peer vs.\nMedia", "Peer vs.\nPolitical Elite", "Media vs.\nPolitical Elite"),
    estimate = rep(NA, 4),
    se = rep(NA, 4))
  
  results$treatments <- factor(results$treatments, levels = results$treatments[1:4]) #assure axis order
  
  
  ## 2. Specify treatment indicators
  df$is.peer <- case_match(df$treat, 0 ~ NA, 1 ~ 1, 2 ~ 0, 3 ~ 0, .default = NA)
  df$media.vs.elite <- case_match(df$treat, 0 ~ NA, 1 ~ NA, 2 ~ 1, 3 ~ 0, .default = NA)
  

  ## 3. Run regressions to get treatment effects and SEs
  if (controls == F){ #bivariate models
    results[1, 2:3] <- coef(summary(lm(get(outcome) ~ is.peer, data = df)))[2, 1:2]
    results[2, 2:3] <- coef(summary(lm(get(outcome) ~ is.peer, data = subset(df, treat %in% c(1,2)))))[2, 1:2]
    results[3, 2:3] <- coef(summary(lm(get(outcome) ~ is.peer, data = subset(df, treat %in% c(1,3)))))[2, 1:2]
    results[4, 2:3] <- coef(summary(lm(get(outcome) ~ media.vs.elite, data = df)))[2, 1:2]
    
  } else { #include controls
    results[1, 2:3] <- coef(summary(lm(get(outcome) ~ is.peer + age + gender + education + income + pol.know +
                                         SP.nationalism + CCAA.nationalism + factor(region),
                                       data = df)))[2, 1:2]
    results[2, 2:3] <- coef(summary(lm(get(outcome) ~ is.peer + age + gender + education + income + pol.know + 
                                         SP.nationalism + CCAA.nationalism + factor(region),
                                       data = subset(df, treat %in% c(1,2)))))[2, 1:2]
    results[3, 2:3] <- coef(summary(lm(get(outcome) ~ is.peer + age + gender + education + income + pol.know + 
                                         SP.nationalism + CCAA.nationalism + factor(region),
                                       data = subset(df, treat %in% c(1,3)))))[2, 1:2]
    results[4, 2:3] <- coef(summary(lm(get(outcome) ~ media.vs.elite + age + gender + education + income + pol.know + 
                                         SP.nationalism + CCAA.nationalism + factor(region),
                                       data = df)))[2, 1:2]
  }
  
  ## 4. Plot Plot 90 and 95% CIs
  myplot <- ggplot(data = results) +
    aes(x = estimate, y = treatments) +
    geom_vline(xintercept = 0, size = 0.5)+
    geom_pointrange(aes(xmin= estimate-1.96*se, xmax= estimate+1.96*se), 
                    shape = 16, size = 0.8, linewidth = 1.5,
                    position = position_dodge(width = -1/2)) +
    geom_pointrange(aes(xmin= estimate-1.645*se, xmax= estimate+1.645*se), 
                    shape = 16, size = 1.1, linewidth = 2.8,
                    position = position_dodge(width = -1/2)) +
    labs(x = "\nDifference in means (10 pt. scale, positive values = more stigma)", y = "", title = "") +
    theme_bw()+
    theme(axis.text = element_text(size = 14),
          axis.title = element_text(size = 14, face = 'bold'),
          #plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
          panel.grid.minor = element_blank(),
          panel.grid.major.y = element_blank()) +
    scale_x_continuous(limits = c(-range, range), breaks = seq(-range, range, jump)) +
    scale_y_discrete(limits = rev)
  
  return(myplot)
}





